Allocation

filedir <- "/mnt/schratt/tgermade_test/master_19_20/enrichMiR_benchmark/results3/"

# input files: Bartel DEA SE
input <- list(
  bartel.hek=paste0(filedir, "bartel.hek.benchmark.rds"),
  bartel.hek.exon=paste0(filedir, "bartel.hek.exon.benchmark.rds"),
  bartel.hela=paste0(filedir, "bartel.hela.benchmark.rds"),
  bartel.hela.exon=paste0(filedir, "bartel.hela.exon.benchmark.rds"),
  jeong=paste0(filedir, "jeong.benchmark.rds"),
  jeong.exon=paste0(filedir, "jeong.exon.benchmark.rds"),
  rat=paste0(filedir, "ratPolyA.benchmark.rds"),
  rat.exon=paste0(filedir, "ratPolyA.exon.benchmark.rds"),
  
  bartel.hek.mirexpr=paste0(filedir, "bartel.hek.mirexpr.benchmark.rds"),
  bartel.hek.exon.mirexpr=paste0(filedir, "bartel.hek.exon.mirexpr.benchmark.rds"),
  bartel.hela.mirexpr=paste0(filedir, "bartel.hela.mirexpr.benchmark.rds"),
  bartel.hela.exon.mirexpr=paste0(filedir, "bartel.hela.exon.mirexpr.benchmark.rds"),
  rat.mirexpr=paste0(filedir, "ratPolyA.mirexpr.benchmark.rds"),
  rat.exon.mirexpr=paste0(filedir, "ratPolyA.exon.mirexpr.benchmark.rds"),
  
  amin=paste0(filedir, "amin.benchmark.rds"),
  amin.exon=paste0(filedir, "amin.exon.benchmark.rds"),
  cherone.d0=paste0(filedir, "cherone.d0.benchmark.rds"),
  cherone.d0.exon=paste0(filedir, "cherone.d0.exon.benchmark.rds"),
  cherone.d1=paste0(filedir, "cherone.d1.benchmark.rds"),
  cherone.d1.exon=paste0(filedir, "cherone.d1.exon.benchmark.rds"),
  
  amin.mirexpr=paste0(filedir, "amin.mirexpr.benchmark.rds"),
  amin.exon.mirexpr=paste0(filedir, "amin.exon.mirexpr.benchmark.rds"),
  cherone.d0.mirexpr=paste0(filedir, "cherone.d0.mirexpr.benchmark2.rds"),
  cherone.d0.exon.mirexpr=paste0(filedir, "cherone.d0.exon.mirexpr.benchmark2.rds"),
  cherone.d1.mirexpr=paste0(filedir, "cherone.d1.mirexpr.benchmark2.rds"),
  cherone.d1.exon.mirexpr=paste0(filedir, "cherone.d1.exon.mirexpr.benchmark2.rds")
  )

TPs <- c( 
  let.7a = "GAGGUAG", lsy.6 = "UUUGUAU", miR.1 = "GGAAUGU", miR.124 = "AAGGCAC", 
  miR.137 = "UAUUGCU", miR.139 = "CUACAGU", miR.143 = "GAGAUGA", 
  miR.144 = "ACAGUAU", miR.153 = "UGCAUAG", miR.155 = "UAAUGCU", 
  miR.182 = "UUGGCAA", miR.199a = "CCAGUGU", miR.204 = "UCCCUUU", 
  miR.205 = "CCUUCAU", miR.216b = "AAUCUCU", miR.223 = "GUCAGUU", 
  miR.7 = "GGAAGAC", miR.122 = "GGAGUGU", miR.133 = "UGGUCCC", 
  miR.138 = "GCUGGUG", miR.145 = "UCCAGUU", miR.184 = "GGACGGA", 
  miR.190a = "GAUAUGU", miR.200b = "AAUACUG", miR.216a = "AAUCUCA", 
  miR.217 = "ACUGCAU", miR.219a = "GAUUGUC", miR.375 = "UUGUUCG", 
  miR.451a = "AACCGUU", "DKOvWT"="GCUACAU", "DKO"="GCUACAU", 
  "miR.138vNeg"="GCUGGUG", "miR.499vNeg"="UAAGACU", "miR.499"="UAAGACU", 
  "218DKOvWT"="UGUGCUU", "218DKO"="UGUGCUU",
  "138DKOvWT"="CUAUUUC", "138DKO"="CUAUUUC"
  )

Prep

# combine dataframes
df <- do.call("rbind", df.list)
# add information
df$sample <- sapply(strsplit(rownames(df),"\\."), function(x) paste(rev(rev(x)[-1]),collapse="."))
df$dataset <- sapply(strsplit(df$sample,"\\."), function(x) x[1])
df$organism <- sapply(df$dataset, function(x){
  if(x=="bartel" | x=="jeong") "human"
  else if(x=="rat") "rat"
  else if(x=="amin" | x=="cherone" | x=="whipple") "mouse"
})
df$celltype <- sapply(df$sample, function(x){
  if(grepl("hek",x)) "HEK293"
  else if(grepl("hela",x)) "HeLa"
  else if(grepl("jeong",x)) "SNU-638"
  else if(grepl("rat",x)) "hippocampal_neurons/glia"
  else if(grepl("in",x)) "induced_neurons"
  else if(grepl("esc",x)) "ESC"
  else if(grepl("amin",x)) "motoneurons"
  else if(grepl("cherone",x)) "CAD"
})
df$miRNA <- sapply(1:nrow(df), function(i){
  if(df$dataset[i]=="bartel") df$treatment[i]
  else if(df$dataset[i]=="rat" & grepl("miR.138",df$treatment[i])) "miR.138"
  else if(df$dataset[i]=="rat" & grepl("miR.499",df$treatment[i])) "miR.499"
  else if(df$dataset[i]=="jeong") "miR.221/miR.222"
  else if(df$dataset[i]=="amin") "miR.218"
  else if(df$dataset[i]=="cherone") "miR.138"
})
df$seed <- sapply(df$treatment, function(x) TPs[x])
df$design <- factor( sapply(df$dataset, function(x){
  if(x=="bartel" | x=="rat") "transfection"
  else if(x=="jeong" | x=="amin" | x=="cherone" | x=="whipple") "KO"
  }), levels=c("transfection","KO") )
df$exon.specific <- factor( sapply(df$sample, function(x) if(grepl("exon",x)) "exonic" else "unseparated"), levels=c("unseparated","exonic") )
df$mirexpr <- factor( sapply(df$sample, function(x) if(grepl("mirexpr",x)) "mirexpr" else "no_mirexpr"), levels=c("no_mirexpr","mirexpr") )

# we get rid of tests in the wrong direction with respect to intervention (mistake in amin dataset):
df <- df[ (df$design=="transfection" & !grepl("\\.up",df$method)) | 
          (df$design!="transfection" & df$dataset!="amin" & !grepl("\\.down",df$method)) |
          (df$dataset=="amin" & !grepl("\\.up",df$method)), ]
df$method <- gsub("\\.down|\\.up","",df$method)
# get rid of NAs in detPPV
df$detPPV[is.na(df$detPPV)] <- 0

# correct some treatment names
for(x in c("bartel","rat","cherone")){
  df$treatment[df$dataset==x & df$miRNA=="miR.138"] <- paste0("miR.138.",x)
}
df$treatment[df$dataset=="jeong"] <- "miR.221/miR.222"
df$treatment[df$treatment=="miR.499vNeg"] <- "miR.499"
df$treatment[df$treatment %in% c("218DKOvWT","218DKO")] <- "miR.218"

# mark bartel dataset
for(x in unique(df$dataset)){
  if(x=="bartel") df$is.bartel[df$dataset==x] <- "bartel" 
  else df$is.bartel[df$dataset==x] <- "other"
}

# remove lsy.6 bartel
df <- df[df$treatment!="lsy.6",]

# convert columns to factors
for(x in colnames(df)[-which(colnames(df) %in% c("detPPV","FP.atFDR05","log10QDiff","log10QrelIncrease","TP.atFDR05"))]){
  df[[x]] <- as.factor(df[[x]])
}

# variables
#dea.names <- as.character(unique(df$treatment))
#props.all <- levels(df$prop)

Plots

Test robustness to noise

VERSION 1:

Dataset difficulty

VERSION 1:

Dataset sensitivity & selectivity: FP vs TP (method & prop & dataset)

mirexpr Analysis

mirexpr Effect: FP vs TP (method & prop & mirexpr)

VERSION 3:

# FP-TP plot (at FDR .05)

## select only datasets that can be compared in terms of mirexpr (not all datasets are run with mirexpr)
df.sub <- df[df$dataset %in% df$dataset[df$mirexpr=="mirexpr"] & df$exon.specific=="unseparated",]

## get average number of FP & TP at FDR .05 for each combination of permutation & method & mirexpr
#df.agg <- aggregate(df.sub[,c("FP.atFDR05","TP.atFDR05")], by=df.sub[,c("prop","method","mirexpr","dataset","design")], FUN=mean)
df.agg <- aggregate(df.sub[,c("FP.atFDR05","TP.atFDR05")], by=df.sub[,c("method","mirexpr","dataset","design")], FUN=mean)

 
 library(ggrepel)
# ggplot(df.agg, aes(x=FP.atFDR05, y=TP.atFDR05, color=method, shape=prop, group=method)) + 
#   geom_line() + 
#   guides(color=FALSE) +
#   coord_cartesian(ylim = c(0,1)) +
#   geom_point(size=3) + 
#   geom_label_repel(data=df.agg %>% filter(prop=="original"), 
#                   aes(label=method), 
#                   force=10, 
#                   size=3,
#                   ylim=c(0,1),
#                   segment.color="black",
#                   segment.alpha=.3, 
#                   arrow=arrow(length = unit(0,"npc"), type = "open", ends = "last")
#                   ) +
#   geom_point() +
#   scale_color_manual(values = colors$method) +
#   facet_grid(design+dataset ~ mirexpr) + 
#   scale_x_sqrt() +
#   theme_cowplot() +
#   theme(panel.border=element_rect(colour = "grey", fill = NA),
#         panel.grid.major=element_line(colour = "grey"), 
#         axis.line=element_blank(),
#         strip.background=element_rect(colour = "grey90"),
#         axis.ticks=element_line(colour = "grey"))


# 
# ggplot(df.agg, aes(x=FP.atFDR05, y=TP.atFDR05, color=method)) + 
#   guides(color=FALSE) +
#   coord_cartesian(ylim = c(0,1)) +
#   geom_point(size=2.5) + 
#   geom_text_repel(data=df.agg, 
#                   aes(label=method), 
#                   force=10, 
#                   size=3.5,
#                   ylim=c(0,1),
#                   segment.color="black",
#                   segment.alpha=.15, 
#                   arrow=arrow(length = unit(0,"npc"), type = "open", ends = "last"),
#                   box.padding = unit(0.375, "lines"),
#                   point.padding = unit(.2, 'lines')
#                   ) +
#   geom_point() +
#   scale_color_manual(values = colors$method) +
#   facet_grid(design+dataset ~ mirexpr) + 
#   scale_x_sqrt() +
#   theme_cowplot() +
#   theme(panel.border=element_rect(colour = "grey", fill = NA),
#         panel.grid.major=element_line(colour = "grey"), 
#         axis.line=element_blank(),
#         strip.background=element_rect(colour = "grey90"),
#         axis.ticks=element_line(colour = "grey"))



ggplot(df.agg, aes(x=FP.atFDR05, y=TP.atFDR05, color=method)) + 
  guides() +
  coord_cartesian(ylim = c(0,1)) +
  geom_point(size=2.5) + 
  geom_label_repel(data=subset(df.agg, method %in% c("wEN","EN","michael","regmir","KS2","combGeom.3","aREAmir")), 
                  aes(label=method), 
                  force=15, 
                  size=3.5,
                  ylim=c(0.05,.9),
                  segment.color="black",
                  segment.alpha=.2, 
                  arrow=arrow(length = unit(0,"npc"), type = "open", ends = "last"),
                  box.padding = unit(0.5, "lines"),
                  point.padding = unit(.35, 'lines'),
                  nudge_x=3.5
                  ) +
  geom_point() +
  scale_color_manual(values = colors$method) +
  facet_grid(design+dataset ~ mirexpr) + 
  scale_x_sqrt() +
  theme_cowplot() +
  theme(panel.border=element_rect(colour = "grey35", fill = NA),
        panel.grid.major=element_line(colour = "grey85"), 
        axis.line=element_blank(),
        strip.background=element_rect(colour = "grey35"),
        axis.ticks=element_line(colour = "grey35"))

mirexpr Effect: box (method)

mirexpr FP & TP rates

VERSION 1:

# # TP heatmap
# mh <- function(df, metric="TP.atFDR05", form=treatment+dataset+mirexpr~prop, fn=mean, cluster_cols=FALSE, annotate=T, ...){
#   m <- reshape2::dcast(df, form, value.var=metric, na.rm=TRUE, fun.aggregate=fn)
#   m$treatment <- as.character(m$treatment)
#   row.names(m) <- m[,1]
#   m[,1] <- NULL
#   if(annotate) ann <- data.frame(dataset=m$dataset, row.names=rownames(m))
#   else ann <- NULL
#   m <- m[,-c(1,2)]
#   pheatmap::pheatmap(m, cluster_cols=cluster_cols, cluster_rows=FALSE, annotation_row=ann, annotation_names_row=FALSE, ...)
# }
# 
# df.sub <- df[df$exon.specific=="unseparated" & df$dataset!="jeong",]
# 
# p1a <- mh(df.sub[df.sub$dataset=="bartel" & df.sub$mirexpr=="mirexpr",], main="mirexpr", annotation_colors = colors,
#          annotate=TRUE, legend=FALSE, annotation_legend=FALSE, show_rownames=FALSE, breaks=seq(0.7,1,length.out = 101))$gtable
# p1b <- mh(df.sub[df.sub$dataset=="bartel" & df.sub$mirexpr!="mirexpr",], main="no_mirexpr", annotation_colors = colors, 
#          annotation_legend=FALSE, breaks=seq(0.7,1,length.out = 101))$gtable
# 
# p2a <- mh(df.sub[df.sub$dataset!="bartel" & df.sub$mirexpr=="mirexpr",], main="mirexpr", annotation_colors = colors,
#          annotate=TRUE, legend=FALSE, annotation_legend=FALSE, show_rownames=FALSE, breaks=seq(.2,1,length.out = 101))$gtable
# p2b <- mh(df.sub[df.sub$dataset!="bartel" & df.sub$mirexpr!="mirexpr",], main="no_mirexpr", annotation_colors = colors, 
#          annotation_legend=TRUE, breaks=seq(.2,1,length.out = 101))$gtable
# 
# gridExtra::grid.arrange(gridExtra::arrangeGrob(grobs=list(p1a,p1b),ncol=2, widths=c(5.6,6) ), 
#                         top=grid::textGrob("TP rate \n", gp=grid::gpar(fontsize=18, fontface="bold")) )
# gridExtra::grid.arrange(gridExtra::arrangeGrob(grobs=list(p2a,p2b),ncol=2, widths=c(4,6) ), 
#                         top=grid::textGrob("TP rate \n", gp=grid::gpar(fontsize=18, fontface="bold")) )




# create heatmap matrix
MHv3 <- function(df, metric="TP.atFDR05", form=treatment+dataset+sample+mirexpr~prop, fn=mean, data.annotate=FALSE, name=name, ...){
  # reshape data
  m2 <- reshape2::dcast(df, form=form, value.var=metric, na.rm=TRUE, fun.aggregate=fn)
  # allocation
  if(any(grepl("mirexpr",as.character(form)))){
    feature <- "mirexpr"
    lvls <- c("no_mirexpr","mirexpr")
  } else {
    feature <- "exon.specific"
    lvls <- c("unseparated","exonic")
  }
  # generate matrix & annotations
  l <- split(m2,m2[[feature]])
  l <- lapply(l, function(x) aggregate(x[,-c(1:4)], by=list(x[,1],x[,2]), FUN="mean"))
  m3 <- cbind(l[[lvls[1]]][,-2],l[[lvls[2]]][,-c(1:2)])
  ds <- as.character(l[[lvls[1]]]$Group.2)  
  ds.bartel <- sapply(ds, function(x) if(x=="bartel") "bartel" else "other")
  names(ds) <- names(ds.bartel) <- rownames(m3) <- m3$Group.1
  m3 <- m3[,-1]
  ds2 <- factor(c(rep(lvls[[1]],4),rep(lvls[[2]],4)), levels=lvls)
  if(any(grepl("mirexpr",as.character(form)))){
    ha <- HeatmapAnnotation(mir=ds2, col=colors, annotation_legend_param=list(title=feature),
                          show_annotation_name=FALSE)
  } else {
    ha <- HeatmapAnnotation(ex=ds2, col=colors, annotation_legend_param=list(title=feature),
                          show_annotation_name=FALSE)
  }
  # heatmap
  hm <- ComplexHeatmap::Heatmap(m3, name=name, cluster_columns=FALSE, rect_gp=gpar(col="white",lwd =.5),
                               column_split=ds2, 
                               row_split=factor(ds.bartel, levels=c("bartel","other")),
                               column_labels=rep(colnames(m3)[1:4],2), 
                               column_title_gp=gpar(fontsize=12), column_title=metric,
                               row_gap=unit(2,"mm"), column_gap=unit(2,"mm"), 
                               top_annotation=ha, ...)
  if(data.annotate){
    hm2 <- ComplexHeatmap::Heatmap(ds, name="dataset", col=colors$dataset, ...)
    return(hm2)
  } else {
    return(hm)
  }
}

# take subset
df.sub <- df[df$exon.specific=="unseparated" & df$dataset!="jeong",]
# generate color gradient for TP
hm.col1 <- circlize::colorRamp2(c(0.2,.6,1),c("#043495","#f4da71","#bd0f0f"), space="RGB")
hm.col2 <- circlize::colorRamp2(seq(5,50,length=3),c("#043495","white","#bd0f0f"), space="RGB")
# generate heatmaps
hm1 <- MHv3(df.sub, col=hm.col1, name="TP rate")
hm2 <- MHv3(df.sub, metric="FP.atFDR05", col=hm.col2, name="FP rate")
hm3 <- MHv3(df.sub, data.annotate=TRUE, show_row_names=TRUE, width = unit(5, "mm"))
# plot
draw(hm1 + hm2 + hm3, ht_gap = unit(c(.5,.2), "cm"), show_row_dend=FALSE)

VERSION 2:

## [1] "1.37e-06"
## [1] "5.45e-181"
##  bartel.hek bartel.hela         rat        amin     cherone 
##     "0.048"  "1.17e-10"     "0.302"     "0.398"         "1"
##  bartel.hek bartel.hela         rat        amin     cherone 
##  "1.05e-88" "4.55e-117"     "0.337"  "1.36e-14"  "1.41e-16"

exonic Analysis

Exon-separation effect: FP vs TP (method & prop & exon.specific)

VERSION 2:

# FP-TP plot (at FDR .05)

## get average number of FP & TP at FDR .05 for each combination of permutation & method & mirexpr
#df.agg <- aggregate(df[,c("FP.atFDR05","TP.atFDR05")], by=df[,c("prop","method","exon.specific","dataset","design")], FUN=mean)
df.agg <- aggregate(df[,c("FP.atFDR05","TP.atFDR05")], by=df[,c("method","exon.specific","dataset","design")], FUN=mean)


library(ggrepel)
# ggplot(df.agg, aes(x=FP.atFDR05, y=TP.atFDR05, color=method, shape=prop, group=method)) + 
#   geom_line() + 
#   guides(color=FALSE) +
#   coord_cartesian(ylim = c(0,1)) +
#   geom_point(size=3) + 
#   geom_label_repel(data=df.agg %>% filter(prop=="original"), 
#                   aes(label=method), 
#                   force=10, 
#                   size=3,
#                   ylim=c(0,1),
#                   segment.color="black",
#                   segment.alpha=.3, 
#                   arrow=arrow(length = unit(0,"npc"), type = "open", ends = "last")
#                   ) +
#   geom_point() +
#   scale_color_manual(values = colors$method) +
#   facet_grid(design+dataset ~ exon.specific) + 
#   scale_x_sqrt() +
#   theme_cowplot() +
#   theme(panel.border=element_rect(colour = "grey", fill = NA),
#         panel.grid.major=element_line(colour = "grey"), 
#         axis.line=element_blank(),
#         strip.background=element_rect(colour = "grey90"),
#         axis.ticks=element_line(colour = "grey"))



ggplot(df.agg, aes(x=FP.atFDR05, y=TP.atFDR05, color=method)) + 
  guides() +
  coord_cartesian(ylim = c(0,1)) +
  geom_point(size=2.5) + 
  geom_label_repel(data=subset(df.agg, method %in% c("wEN","EN","michael","regmir","KS2","combGeom.3","aREAmir")), 
                  aes(label=method), 
                  force=13, 
                  size=3.5,
                  ylim=c(0.05,.9),
                  segment.color="black",
                  segment.alpha=.2, 
                  arrow=arrow(length = unit(0,"npc"), type = "open", ends = "last"),
                  box.padding = unit(0.5, "lines"),
                  point.padding = unit(.35, 'lines'),
                  nudge_x=1
                  ) +
  geom_point() +
  scale_color_manual(values = colors$method) +
  facet_grid(design+dataset ~ exon.specific) + 
  scale_x_sqrt() +
  theme_cowplot() +
  theme(panel.border=element_rect(colour = "grey35", fill = NA),
        panel.grid.major=element_line(colour = "grey85"), 
        axis.line=element_blank(),
        strip.background=element_rect(colour = "grey35"),
        axis.ticks=element_line(colour = "grey35"))

exonic FP & TP rates

VERSION 1:

# TP heatmap
# mh <- function(df, metric="TP.atFDR05", form=treatment+dataset+mirexpr~prop, fn=mean, cluster_cols=FALSE, annotate=T, ...){
#   m <- reshape2::dcast(df, form, value.var=metric, na.rm=TRUE, fun.aggregate=fn)
#   m$treatment <- as.character(m$treatment)
#   row.names(m) <- m[,1]
#   m[,1] <- NULL
#   if(annotate) ann <- data.frame(dataset=m$dataset, row.names=rownames(m))
#   else ann <- NULL
#   m <- m[,-c(1,2)]
#   pheatmap::pheatmap(m, cluster_cols=cluster_cols, cluster_rows=FALSE, annotation_row=ann, annotation_names_row=FALSE, ...)
# }
# 
# df.sub <- df[df$mirexpr=="no_mirexpr",]

# p1a <- mh(df.sub[df.sub$dataset=="bartel" & df.sub$mirexpr=="mirexpr",], "detPPV", main="mirexpr", annotation_colors = colors,
#          annotate=TRUE, legend=FALSE, annotation_legend=FALSE, show_rownames=FALSE, breaks=seq(0.65,1,length.out = 101))$gtable
# p1b <- mh(df.sub[df.sub$dataset=="bartel" & df.sub$mirexpr!="mirexpr",], "detPPV", main="no_mirexpr", annotation_colors = colors, 
#          annotation_legend=FALSE, breaks=seq(0.65,1,length.out = 101))$gtable
# 
# p2a <- mh(df.sub[df.sub$dataset!="bartel" & df.sub$mirexpr=="mirexpr",], "detPPV", main="mirexpr", annotation_colors = colors,
#          annotate=TRUE, legend=FALSE, annotation_legend=FALSE, show_rownames=FALSE, breaks=NULL)$gtable
# p2b <- mh(df.sub[df.sub$dataset!="bartel" & df.sub$mirexpr!="mirexpr",], "detPPV", main="no_mirexpr", annotation_colors = colors, 
#          annotation_legend=TRUE, breaks=NULL)$gtable


# 
# p1a <- mh(df.sub[df.sub$dataset=="bartel" & df.sub$exon.specific=="exonic",], main="exonic", annotation_colors = colors,
#          annotate=TRUE, legend=FALSE, annotation_legend=FALSE, show_rownames=FALSE, breaks=seq(.2,1,length.out = 101))$gtable
# p1b <- mh(df.sub[df.sub$dataset=="bartel" & df.sub$exon.specific!="exonic",], main="unseparated", annotation_colors = colors, 
#          annotation_legend=FALSE, breaks=seq(0.2,1,length.out = 101))$gtable
# 
# p2a <- mh(df.sub[df.sub$dataset!="bartel" & df.sub$exon.specific=="exonic",], main="exonic", annotation_colors = colors,
#          annotate=TRUE, legend=FALSE, annotation_legend=FALSE, show_rownames=FALSE, breaks=seq(0,.9,length.out = 101))$gtable
# p2b <- mh(df.sub[df.sub$dataset!="bartel" & df.sub$exon.specific!="exonic",], main="unseparated", annotation_colors = colors, 
#          annotation_legend=TRUE, breaks=seq(0,.9,length.out = 101))$gtable
# 
# gridExtra::grid.arrange(gridExtra::arrangeGrob(grobs=list(p1a,p1b),ncol=2, widths=c(5.6,6) ), 
#                         top=grid::textGrob("TP rate \n", gp=grid::gpar(fontsize=18, fontface="bold")) )
# gridExtra::grid.arrange(gridExtra::arrangeGrob(grobs=list(p2a,p2b),ncol=2, widths=c(4,6) ), 
#                         top=grid::textGrob("TP rate \n", gp=grid::gpar(fontsize=18, fontface="bold")) )


# take subset
df.sub <- df[df$mirexpr=="no_mirexpr",]
# generate color gradient for TP
hm.col1 <- circlize::colorRamp2(c(0,.6,1),c("#043495","#f4da71","#bd0f0f"), space="RGB")
hm.col2 <- circlize::colorRamp2(seq(5,50,length=3),c("#043495","white","#bd0f0f"), space="RGB")
# generate heatmaps
hm1 <- MHv3(df.sub, col=hm.col1, name="TP rate", form=treatment+dataset+sample+exon.specific~prop)
hm2 <- MHv3(df.sub, metric="FP.atFDR05", col=hm.col2, name="FP rate", form=treatment+dataset+sample+exon.specific~prop)
hm3 <- MHv3(df.sub, data.annotate=TRUE, show_row_names=TRUE, width = unit(5, "mm"), form=treatment+dataset+sample+exon.specific~prop)
# plot
draw(hm1 + hm2 + hm3, ht_gap = unit(c(.5,.2), "cm"), show_row_dend=FALSE)

VERSION 2:

## [1] "1.01e-171"
## [1] "1.71e-187"
##  bartel.hek bartel.hela       jeong         rat        amin     cherone 
## "2.42e-112"  "3.81e-34"     "0.171"  "2.62e-93"     "0.019"         "1"
##  bartel.hek bartel.hela       jeong         rat        amin     cherone 
## "3.27e-107"  "2.10e-83"  "6.09e-24"  "3.67e-09"  "4.39e-14"  "3.65e-09"

Score distributions: curve (method & prop)

Plots

for(i in seq_along(input)){
  # title
  cat("## ", names(input)[i],"\n\n")
  df <- readRDS(input[[i]])
  # score plots
  for(j in c("detPPV","FP.atFDR05","log10QDiff","TP.atFDR05")){
    if(j!="TP.atFDR05"){
      ## density plot: score distribution per permutation
      cat("\n\n")
      cat("### ","Scores density plot: ",j,"\n\n")
      print( 
        ggplot(df, aes(log10(df[[j]]))) + geom_density(aes(fill=method), alpha=0.9)  + facet_wrap(~prop) + xlab(j)
        )
      ## violin plot
      if(!j %in% c("FP.atFDR05","log10QDiff")){
        cat("\n\n")
        cat("### ","Scores violin plot: ",j,"\n\n")
        print( 
          ggplot(df, aes(x=prop, y=df[[j]], fill=method))  + geom_violin() + facet_wrap(~method) +
            guides(fill=FALSE) + ylab(j) + geom_jitter(shape=16, position=position_jitter(0.2))
        )
      }
    }
    ## boxplot FP
    if(j=="FP.atFDR05"){
      cat("\n\n")
      cat("### ","Scores boxplot: ",j,"\n\n")
      print(
        ggplot(df, aes(x=method, y=df[[j]], fill=method)) + geom_boxplot() +
              guides(fill=FALSE) + ylab(j) + coord_flip() + scale_x_discrete(limits = rev(levels(df$method)))
            )
    }
    ## violin plot median
    cat("\n\n")
    cat("### ","Scores violin plot: ",j,"\n\n")
    print(
     ggplot(df, aes(x=prop, y=df[[j]], fill=method))  + geom_violin(draw_quantiles = 0.5) + facet_wrap(~method) +
      guides(fill=FALSE) + ylab(j)
    ) 
    ## mean curves plot
    if(i!="FP.atFDR05"){
      cat("\n\n")
      cat("### ","Scores smoothed means: ",j,"\n\n")
      print(
        ggplot(df, aes(x=as.integer(df$prop), y=df[[j]], color=method)) + geom_smooth(alpha=.1) + scale_x_continuous(labels=unique(props.all)) + 
          ylab(j) + xlab("prop")
        )
    }
  }
  cat("\n\n")
  cat("### ","FP-TP plot: ",names(input)[i],"\n\n")
  # FP-TP plot (at FDR .05)
  ## get average number of FP & TP at FDR .05 for each combination of permutation & method
  df.agg <- aggregate(df[,c("FP.atFDR05","TP.atFDR05")], by=df[,c("prop","method")], FUN=mean)
  ## plot
  print(
    ggplot(df.agg, aes(x=FP.atFDR05, y=TP.atFDR05, color=method, shape=prop, group=method)) + 
    geom_line() + 
    geom_point(size=3) + 
    geom_label_repel(data=df.agg %>% filter(prop==50), 
                    aes(label=method), 
                    force=5, 
                    box.padding = unit(.75, "lines"),
                    segment.color="black",
                    segment.alpha=.2, 
                    arrow=arrow(length = unit(.01,"npc"), type = "open", ends = "last")
                    ) +
    geom_point() +
    theme_classic(base_size = 15)
  )
  
  cat("\n\n")
}


# # plots
# for(i in c("detPPV","FP.atFDR05","log10QDiff","TP.atFDR05")){
#   cat("Score analysis: ", i,"\n\n")
#   ## boxplot
#   # print( 
#   #   ggplot(df, aes(x=prop, y=df[[i]], fill=method)) + geom_boxplot() + ylab(i) 
#   #   )
#   if(i!="TP.atFDR05"){
#     ## density plot: score distribution per permutation
#     print( 
#       ggplot(df, aes(log10(df[[i]]))) + geom_density(aes(fill=method), alpha=0.9)  + facet_wrap(~prop) + xlab(i)
#       )
#     ## violin plot
#     if(!i %in% c("FP.atFDR05","log10QDiff")){
#       print( 
#         ggplot(df, aes(x=prop, y=df[[i]], fill=method))  + geom_violin() + facet_wrap(~method) +
#           guides(fill=FALSE) + ylab(i) + geom_jitter(shape=16, position=position_jitter(0.2))
#       )
#     }
#   }
#   ## boxplot FP
#   if(i=="FP.atFDR05"){
#     print(
#       ggplot(df, aes(x=method, y=df[[i]], fill=method)) + geom_boxplot() +
#             guides(fill=FALSE) + ylab(i) + coord_flip() + scale_x_discrete(limits = rev(levels(df$method)))
#           )
#   }
#   ## violin plot median
#  print(
#    ggplot(df, aes(x=prop, y=df[[i]], fill=method))  + geom_violin(draw_quantiles = 0.5) + facet_wrap(~method) +
#     guides(fill=FALSE) + ylab(i)
#   ) 
#   ## mean curves plot
#   if(i!="FP.atFDR05"){
#     print(
#       ggplot(df, aes(x=as.integer(df$prop), y=df[[i]], color=method)) + geom_smooth(alpha=.1) + scale_x_continuous(labels=unique(props.all)) + 
#         ylab(i) + xlab("prop")
#       )
#   }
# }

Pierre analysis

Effect of mirexpr

##     
##      amin bartel cherone jeong  rat
##   -1    0      2       0     0   24
##   0   188   5187     380     0  343
##   1     2    131       0     0   13
## 
##   -1    0    1 
##   26 6098  146
##                     amin                amin.exon        amin.exon.mirexpr 
##                        0                        0                        0 
##             amin.mirexpr               bartel.hek          bartel.hek.exon 
##                        0                        0                        0 
##  bartel.hek.exon.mirexpr       bartel.hek.mirexpr              bartel.hela 
##                        0                        0                        0 
##         bartel.hela.exon bartel.hela.exon.mirexpr      bartel.hela.mirexpr 
##                        0                        0                        2 
##               cherone.d0          cherone.d0.exon  cherone.d0.exon.mirexpr 
##                        0                        0                        0 
##       cherone.d0.mirexpr               cherone.d1          cherone.d1.exon 
##                        0                        0                        0 
##  cherone.d1.exon.mirexpr       cherone.d1.mirexpr                    jeong 
##                        0                        0                        0 
##               jeong.exon                      rat                 rat.exon 
##                        0                        0                        0 
##         rat.exon.mirexpr              rat.mirexpr 
##                        0                       24
##     
##      aREAmir aREAmir2 combFish.1 combFish.2 combFish.3 combGeom.1 combGeom.2
##   -1       0        0          0          2          3          1          0
##   0      317      323        330        328        326        328        330
##   1       13        7          0          0          1          1          0
##     
##      combGeom.3  EN GSEA  KS KS2 michael modScore modSites  MW regmir regmirb
##   -1          1   2    2   1   3       3        0        0   0      6       0
##   0         327 326  231 329 316     326      330      329 330    320     328
##   1           2   2   97   0  11       1        0        1   0      4       2
##     
##      wEN
##   -1   2
##   0  324
##   1    4

Effect of spliced-specific analysis

##     
##      amin bartel cherone jeong  rat
##   -1   23    744       0     0  214
##   0   155   4516     380   187  166
##   1    12     60       0     3    0
##     
##      amin amin.exon amin.exon.mirexpr amin.mirexpr bartel.hek bartel.hek.exon
##   -1    0        23                 0            0          0             454
##   0     0       155                 0            0          0            1817
##   1     0        12                 0            0          0               9
##     
##      bartel.hek.exon.mirexpr bartel.hek.mirexpr bartel.hela bartel.hela.exon
##   -1                       0                  0           0              290
##   0                        0                  0           0             2699
##   1                        0                  0           0               51
##     
##      bartel.hela.exon.mirexpr bartel.hela.mirexpr cherone.d0 cherone.d0.exon
##   -1                        0                   0          0               0
##   0                         0                   0          0             190
##   1                         0                   0          0               0
##     
##      cherone.d0.exon.mirexpr cherone.d0.mirexpr cherone.d1 cherone.d1.exon
##   -1                       0                  0          0               0
##   0                        0                  0          0             190
##   1                        0                  0          0               0
##     
##      cherone.d1.exon.mirexpr cherone.d1.mirexpr jeong jeong.exon  rat rat.exon
##   -1                       0                  0     0          0    0      214
##   0                        0                  0     0        187    0      166
##   1                        0                  0     0          3    0        0
##     
##      rat.exon.mirexpr rat.mirexpr
##   -1                0           0
##   0                 0           0
##   1                 0           0
##     
##      aREAmir aREAmir2 combFish.1 combFish.2 combFish.3 combGeom.1 combGeom.2
##   -1      70       72         20         27         56         43         50
##   0      270      267        320        313        284        297        290
##   1        0        1          0          0          0          0          0
##     
##      combGeom.3  EN GSEA  KS KS2 michael modScore modSites  MW regmir regmirb
##   -1         64  42   78  11  61      48       31       37   9     28     188
##   0         276 298  262 329 275     289      309      292 331    256     152
##   1           0   0    0   0   4       3        0       11   0     56       0
##     
##      wEN
##   -1  46
##   0  294
##   1    0